packages <- c("CIMseq", "CIMseq.data", "tidyverse", "ggthemes")
purrr::walk(packages, library, character.only = TRUE)
rm(packages)

##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
#rename classes
renameClasses <- function(oldClasses) {
  case_when(
    oldClasses == "10" ~ "SI.TA.1",
    oldClasses == "11" ~ "SI.Lgr5.Mki67.low",
    oldClasses == "13" ~ "SI.Goblet",
    oldClasses == "14" ~ "SI.TA.2",
    oldClasses == "15" ~ "SI.Lgr5",
    oldClasses == "16" ~ "SI.Enteroendocrine",
    oldClasses == "17" ~ "SI.Enterocytes",
    oldClasses == "19" ~ "SI.Tufft",
    oldClasses == "20" ~ "SI.Lgr5.Mki67.low.singelton",
    oldClasses == "22" ~ "SI.Paneth",
    oldClasses == "23" ~ "SI.Goblet.Mki67",
    oldClasses == "24" ~ "Blood",
    oldClasses == "4" ~ "SI.Lgr5.Mki67.high",
    oldClasses == "42" ~ "C.Proximal.Lgr5",
    oldClasses == "43" ~ "C.Proximal.TA",
    oldClasses == "44" ~ "C.Proximal.colonocytes",
    oldClasses == "45" ~ "C.Proximal.Lgr5.Mki67",
    oldClasses == "46" ~ "C.Proximal.goblet",
    oldClasses == "47" ~ "C.Proximal.Enteroendocrine",
    oldClasses == "48" ~ "C.Proximal.Tufft",
    oldClasses == "49" ~ "C.Distal.Lgr5",
    oldClasses == "50" ~ "C.Distal.Goblet",
    oldClasses == "51" ~ "C.Distal.TA",
    oldClasses == "52" ~ "C.Distal.Lgr5.Mki67",
    oldClasses == "53" ~ "C.Distal.Goblet.Fos",
    oldClasses == "54" ~ "C.Distal.Goblet.Plet1",
    oldClasses == "55" ~ "C.Distal.colonocytes",
    oldClasses == "56" ~ "C.Distal.Goblet.Mki67",
    oldClasses == "57" ~ "C.Distal.Tufft",
    oldClasses == "58" ~ "C.Distal.Enteroendocrine",
    TRUE ~ "error"
  )
}

getData(cObjSng, "classification") <- renameClasses(getData(cObjSng, "classification"))
fractions <- getData(sObj, "fractions")
colnames(fractions) <- renameClasses(colnames(fractions))
sObj@fractions <- fractions

Fig 1: Classes

plotUnsupervisedClass(cObjSng, cObjMul)

Fig 2: Cell type gene expression

plotUnsupervisedMarkers(
  cObjSng, cObjMul,
  c("Lgr5", "Ptprc", "Chga", "Dclk1", "Slc26a3", "Atoh1", "Alpi", "Lyz1"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)

Fig 3: Cell cycle and architecture marker

plotUnsupervisedMarkers(
  cObjSng, cObjMul, c("Mki67", "Plet1", "Junb"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)

Fig 4: Plates

plotUnsupervisedClass(cObjSng, cObjMul) %>%
  plotData() %>%
  inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
  ggplot() +
  geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = unique_key)) +
  ggthemes::theme_few() +
  scale_colour_manual(values = col40())

Fig 5: Mice

plotUnsupervisedClass(cObjSng, cObjMul) %>%
  plotData() %>%
  inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
  mutate(subject_id = as.character(subject_id)) %>%
  ggplot() +
  geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_id), alpha = 0.3) +
  ggthemes::theme_few() +
  scale_colour_manual(values = col40())

Fig 6: Age

plotUnsupervisedClass(cObjSng, cObjMul) %>%
  plotData() %>%
  inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
  mutate(subject_age = as.character(subject_age)) %>%
  ggplot() +
  geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_age)) +
  ggthemes::theme_few() +
  scale_colour_manual(values = col40())

Fig 7: Connections per multiplet

adj <- adjustFractions(cObjSng, cObjMul, sObj)
table(apply(adj, 1, sum)) / nrow(adj) * 100
## 
##          0          1          2          3          4          5 
##  7.2727273 29.3048128 37.0053476 18.7165775  5.4545455  1.7112299 
##          6 
##  0.5347594

Fig 8: Fraction histogram

tibble(fractions = c(fractions)) %>%
  ggplot() +
  geom_histogram(aes(fractions), binwidth = 0.01) +
  theme_bw()

Fig 9: Detected cell types vs. cost

tibble(
  nCellTypes = apply(adj, 1, sum),
  cost = getData(sObj, "costs")
) %>%
  ggplot() +
  geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
  scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
  theme_bw()

Fig 10: Estimated cell numbers vs. cost

tibble(
  sample = names(getData(sObj, "costs")),
  cost = unname(getData(sObj, "costs"))
) %>%
  inner_join(
    select(estimateCells(cObjSng, cObjMul), sample, estimatedCellNumber), 
    by = "sample"
  ) %>%
  mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number"
    #breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
  ) +
  theme_bw()

Fig 11: Estimated cell number vs. Detected cell number

ercc <- filter(estimateCells(cObjSng, cObjMul), sampleType == "Multiplet")
nConnections <- apply(adj, 1, sum)
nConnections %>% 
  tibble(
    sample = names(.), 
    detectedConnections = .
  ) %>% 
  inner_join(ercc) %>% 
  mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number", 
    breaks = 0:max(round(ercc$estimatedCellNumber))
  ) +
  scale_y_continuous(
    name = "Detected cell number",
    breaks = 0:max(round(nConnections))
  ) +
  theme_bw()
## Joining, by = "sample"

Fig 12: Detected cell number vs. Total counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.counts = colSums(getData(cObjMul, "counts"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total counts") +
  theme_bw()

Fig 13: Detected cell number vs. Total ERCC counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.ercc = colSums(getData(cObjMul, "counts.ercc"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total ERCC counts") +
  theme_bw()

Fig 14: Connections

plotSwarmCircos(sObj, cObjSng, cObjMul, classOrder = c(
  "SI.Goblet.Mki67", "SI.Goblet", "SI.Paneth", "SI.Lgr5", "SI.Lgr5.Mki67.low", 
  "SI.Lgr5.Mki67.low.singelton", "SI.Lgr5.Mki67.high", "SI.TA.1", "SI.TA.2", "SI.Enterocytes",
  "SI.Enteroendocrine", "SI.Tufft", "Blood",
  "C.Proximal.Lgr5", "C.Proximal.Lgr5.Mki67", "C.Proximal.TA", "C.Proximal.colonocytes",
  "C.Distal.Lgr5", "C.Distal.Lgr5.Mki67", "C.Distal.TA", "C.Distal.colonocytes",
  "C.Proximal.goblet", "C.Distal.Goblet", "C.Distal.Goblet.Fos", "C.Distal.Goblet.Plet1", "C.Distal.Goblet.Mki67",
  "C.Proximal.Enteroendocrine", "C.Distal.Enteroendocrine", "C.Proximal.Tufft", "C.Distal.Tufft"
))

Fig 15: Filtered

Only detected duplicates and triplicates.
Only ERCC estimated cell number <= 4.
Weight cutoff = 5.

adj <- adjustFractions(cObjSng, cObjMul, sObj, binary = TRUE)
samples <- rownames(adj)
rs <- rowSums(adj)
keep1 <- rs == 2 | rs == 3
keep2 <- samples %in% filter(estimateCells(cObjSng, cObjMul), estimatedCellNumber <= 4)$sample

plotSwarmCircos(
  filterSwarm(sObj, keep1 & keep2), cObjSng, cObjMul, weightCut = 5, 
  classOrder = c(
  "SI.Goblet.Mki67", "SI.Goblet", "SI.Paneth", "SI.Lgr5", "SI.Lgr5.Mki67.low", 
  "SI.Lgr5.Mki67.low.singelton", "SI.Lgr5.Mki67.high", "SI.TA.1", "SI.TA.2", "SI.Enterocytes",
  "SI.Enteroendocrine", "SI.Tufft", "Blood",
  "C.Proximal.Lgr5", "C.Proximal.Lgr5.Mki67", "C.Proximal.TA", "C.Proximal.colonocytes",
  "C.Distal.Lgr5", "C.Distal.Lgr5.Mki67", "C.Distal.TA", "C.Distal.colonocytes",
  "C.Proximal.goblet", "C.Distal.Goblet", "C.Distal.Goblet.Fos", "C.Distal.Goblet.Plet1", "C.Distal.Goblet.Mki67",
  "C.Proximal.Enteroendocrine", "C.Distal.Enteroendocrine", "C.Proximal.Tufft", "C.Distal.Tufft"
))